# -*- coding: cp936 -*-

#FileName:  PicCollector_Baidu.py
#Author:    Pang Liang
#Time:      2012-9-24
#Descript:  Get 20 Pictures from Baidu Image with a Keyword List
import os, urllib
import re
import time

class AppURLopener(urllib.FancyURLopener):
    version = "Mozilla/5.0"

def c2u(x):
    return x.decode('cp936').encode('utf-8')

def DownloadPictureFromGoogle(Tag,PicNum):
    dirname=Tag
    Tag=c2u(Tag)
    if not os.path.isdir(dirname):
        os.mkdir(dirname)
    urllib._urlopener = AppURLopener()
    url = "http://www.google.com.hk/search?num="+str(PicNum)+\
        "&hl=en&newwindow=1&safe=strict&site=imghp&tbm=isch&source=hp&biw=1366&bih=581&q="+\
        urllib.quote(Tag)+"&oq="+urllib.quote(Tag)+\
        "&gs_l=img.3...4458.4458.0.4776.1.1.0.0.0.0.0.0..0.0...0.0...1ac.1j4.8ch6oBtzfRQ"
    print url
    page = urllib.urlopen(url).read()
    matches = re.compile(r'\"(http://t.\.gstatic.com/images\?q=tbn:.+?)\"').findall(page)
    print matches
    i=0;
    for m in matches:
        i+=1
        filename = dirname +"/"+ str(i)+'.jpg'
        urllib.urlretrieve(m , filename)
        time.sleep(1)
        print m

def DownloadPictureFromBaidu(Tag,PicNum):
    dirname=Tag
    Tag=c2u(Tag)
    if not os.path.isdir(dirname):
        os.mkdir(dirname)
    urllib._urlopener = AppURLopener()
    urllib._urlopener.addheader("Referer:http://image.baidu.com")
    url = "http://image.baidu.com/i?ct=201326592&cl=2&nc=1&lm=-1&st=-1&tn=baiduimage&istype=2&fm=index&pv=&z=0&word="+\
        urllib.quote(Tag)+"&oq=pingg&f=3&rsp=0&ie=utf-8"
    print url
    page = urllib.urlopen(url).read()
    matches = re.compile(r'\"(http://t.\.baidu.com/it/u=[0-9]+,[0-9]+&fm=0&gp=0\.jpg)\"').findall(page)
    print matches
    i=0;
    for m in matches:
        print m
        i+=1
        filename = dirname +"/"+ str(i)+'.jpg'
        urllib.urlretrieve(m , filename)
        time.sleep(0.5)
        
for KeyWord in open('list.txt'):
    try:
        DownloadPictureFromBaidu(KeyWord.strip(),20)
        time.sleep(1)
    except:
        print 'Error!'
        print KeyWord
